# nolint start
# Author: T. M. Jensen Ahokovi

# 1 · Setup -------------------------------------------------------------------

rm(list = ls())
set.seed(123)

library(tidyverse)  
library(haven)      
library(AER)        
library(clubSandwich)  
library(broom)      
library(boot)       
library(stargazer) 

# uncomment the line below and add working directory path between the quotation marks
# dir_master <- "ADD WORKING DIRECTORY HERE/"

dir_data <- file.path(dir_master, "Data/")
dir_code <- file.path(dir_master, "Do/")
dir_output <- file.path(dir_master, "Results/")
dir_charts <- file.path(dir_master, "Charts/")

# 2 · Load Data ---------------------------------------------------------------

#// Load the main dataset from Stata file
df_main <- read_dta(file.path(dir_data, "Processing/state_elections_panel_df5_20240731.dta"))

# 3 · Data Processing ---------------------------------------------------------

## 3.1 · Filter Dataset

#// Subset data to include only years 2020-2022
df_analysis <- df_main %>%
  filter(year >= 2020 & year <= 2022)

#// Get list of unique states for sensitivity analysis
states_unique <- unique(df_analysis$state)

# 4 · Helper Functions --------------------------------------------------------

#' Fit 2SLS model and extract coefficient with clustered standard errors
#' 
#' @param data Data frame for analysis
#' @param include_control Logical, whether to include norm_vote_top2_share_sd control
#' @return Named vector with estimate and standard error
fit_2sls_model <- function(data, include_control = FALSE) {
  
  if (include_control) {
    #// Model with control variable
    model_formula <- incumbent_vote_share_top2 ~ total_1k_muni_aid_per_resident + norm_vote_top2_share_sd |
                     reps_per_million + norm_vote_top2_share_sd
  } else {
    #// Baseline model without controls
    model_formula <- incumbent_vote_share_top2 ~ total_1k_muni_aid_per_resident | reps_per_million
  }
  
  #// Fit 2SLS model
  model_2sls <- ivreg(model_formula, data = data)
  
  #// Calculate clustered standard errors at state level
  vcov_cluster <- vcovCL(model_2sls, cluster = ~ state)
  
  #// Get coefficient test results with clustered standard errors
  coefs_results <- coeftest(model_2sls, vcov = vcov_cluster)
  
  #// Extract coefficient of interest and its standard error
  estimate <- coefs_results["total_1k_muni_aid_per_resident", "Estimate"]
  std_error <- coefs_results["total_1k_muni_aid_per_resident", "Std. Error"]
  
  return(c(estimate = estimate, std_error = std_error))
}

#' Create sensitivity analysis plot
#' 
#' @param results_df Data frame with State, Estimate, StdError columns
#' @param full_sample_estimate Numeric, the full sample coefficient estimate
#' @param plot_title Character, title for the plot
#' @param filename Character, filename for saving the plot
#' @return ggplot object
create_sensitivity_plot <- function(results_df, full_sample_estimate, plot_title = "", filename) {
  
  #// Calculate 95% confidence intervals
  results_df$LowerCI <- results_df$Estimate - 1.96 * results_df$StdError
  results_df$UpperCI <- results_df$Estimate + 1.96 * results_df$StdError
  
  #// Create plot
  plot_output <- ggplot(results_df, aes(x = State, y = Estimate)) +
    geom_point() +
    geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI), width = 0.2) +
    geom_hline(yintercept = 0, linetype = "solid", color = "black") +  #// Add reference line at zero
    geom_hline(yintercept = full_sample_estimate, linetype = "dashed", color = "black") +  #// Add full sample estimate line
    scale_y_continuous(breaks = seq(floor(min(results_df$LowerCI)), 
                                  ceiling(max(results_df$UpperCI)), by = 1)) +  #// Set y-axis intervals to 1
    theme_bw() +
    labs(title = plot_title,
         x = "State Omitted",
         y = "Coefficient Estimate") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  #// Save plot
  ggsave(filename = file.path(dir_charts, filename),
         plot = plot_output, width = 10, height = 6, dpi = 300)
  
  return(plot_output)
}

# 5 · Analysis ----------------------------------------------------------------

## 5.1 · Initial Full Sample Models

#// Fit initial baseline 2SLS model on full sample
model_baseline_full <- ivreg(incumbent_vote_share_top2 ~ total_1k_muni_aid_per_resident | reps_per_million, 
                           data = df_analysis)
vcov_baseline_full <- vcovCL(model_baseline_full, cluster = ~ state)
coefs_baseline_full <- coeftest(model_baseline_full, vcov = vcov_baseline_full)

#// Extract full sample baseline coefficient for reference line
coef_baseline_full <- coefs_baseline_full["total_1k_muni_aid_per_resident", "Estimate"]

#// Fit initial model with control variable on full sample
model_control_full <- ivreg(incumbent_vote_share_top2 ~ total_1k_muni_aid_per_resident + norm_vote_top2_share_sd |
                          reps_per_million + norm_vote_top2_share_sd, data = df_analysis)
vcov_control_full <- vcovCL(model_control_full, cluster = ~ state)
coefs_control_full <- coeftest(model_control_full, vcov = vcov_control_full)

#// Extract full sample control model coefficient for reference line
coef_control_full <- coefs_control_full["total_1k_muni_aid_per_resident", "Estimate"]

## 5.2 · Leave-One-Out Sensitivity Analysis

### 5.2.1 · Baseline Model Sensitivity

#// Initialize storage for baseline model results
results_list_baseline <- list()

#// Loop over each state for baseline model sensitivity analysis
for (state in states_unique) {
  
  #// Create subset excluding current state
  df_temp <- df_analysis[df_analysis$state != state, ]
  
  #// Fit model and extract results
  temp_results <- fit_2sls_model(df_temp, include_control = FALSE)
  
  #// Store results
  results_list_baseline[[state]] <- temp_results
}

### 5.2.2 · Control Model Sensitivity

#// Initialize storage for control model results
results_list_control <- list()

#// Loop over each state for control model sensitivity analysis
for (state in states_unique) {
  
  #// Create subset excluding current state
  df_temp <- df_analysis[df_analysis$state != state, ]
  
  #// Fit model with control and extract results
  temp_results <- fit_2sls_model(df_temp, include_control = TRUE)
  
  #// Store results
  results_list_control[[state]] <- temp_results
}

# 6 · Results Combination -----------------------------------------------------

## 6.1 · Compile Baseline Model Results

#// Convert baseline results to data frame
results_matrix_baseline <- do.call(rbind, results_list_baseline)
colnames(results_matrix_baseline) <- c("Estimate", "StdError")
df_results_baseline <- data.frame(State = states_unique, results_matrix_baseline)

## 6.2 · Compile Control Model Results

#// Convert control model results to data frame
results_matrix_control <- do.call(rbind, results_list_control)
colnames(results_matrix_control) <- c("Estimate", "StdError")
df_results_control <- data.frame(State = states_unique, results_matrix_control)

# 7 · Output Generation -------------------------------------------------------

## 7.1 · Generate Baseline Model Plot

plot_baseline <- create_sensitivity_plot(
  results_df = df_results_baseline,
  full_sample_estimate = coef_baseline_full,  #// Use actual full sample estimate
  plot_title = "",
  filename = "2SLS_Estimates_by_State.png"
)

print(plot_baseline)

## 7.2 · Generate Control Model Plot

plot_control <- create_sensitivity_plot(
  results_df = df_results_control,
  full_sample_estimate = coef_control_full,  #// Use actual full sample estimate
  plot_title = "",
  filename = "2SLS_Estimates_with_Control_by_State.png"
)

print(plot_control)

## 7.3 · Display Results Summary

#// Print coefficient test results for full sample models
cat("Full Sample Baseline Model Results:\n")
print(coefs_baseline_full)

cat("\nFull Sample Control Model Results:\n")
print(coefs_control_full)

# nolint end